import os
import fool

def entity_spl(file):
    disease = []
    body = []
    check = []
    symbol = []
    with open(file,'r',encoding='utf-8') as f:
        f = f.readlines()
        for line in f:
            if line:
                line = line.split(',')
                if line[2] == 'D\n':
                    disease.append(line[1])
                elif line[2] == 'B\n':
                    body.append(line[1])
                elif line[2] == 'C\n':
                    check.append(line[1])
                elif line[2] == 'S\n':
                    symbol.append(line[1])
    print(disease)
    with open('disease.txt','w',encoding='utf-8') as f:
        f.write(str(disease).upper().replace("'",'"').replace('PAGET"',"paget'"))
    with open('body.txt', 'w', encoding='utf-8') as f:
        f.write(str(body).replace("'",'"'))
    with open('check.txt', 'w', encoding='utf-8') as f:
        f.write(str(check).replace("'",'"'))
    with open('symbol.txt', 'w', encoding='utf-8') as f:
        f.write(str(symbol).replace("'",'"'))

# entity_spl('nodes.csv')

def vocabulary(file_path,vocab_path):
    n = 0
    dic = {}
    with open(vocab_path,'w',encoding='utf-8') as f:
        for roots, dirs, files in os.walk(file_path):
            for file in files:
                file = roots + '/' + file
                with open(file,'r',encoding='utf-8') as g:
                    content = g.readlines()
                    for line in content:
                        vocab = fool.cut(line)
                        for i in vocab:
                            for j in i:
                                print(dic)
                                j = j.replace('\n','')
                                if j:
                                    if not dic.get(j) and dic.get(j) != 0:
                                        dic[j] = n
                                        n += 1
       # print(dic)
        for key, value in dic.items():
            f.write(str(value)+':'+str(key)+'\n')


# vocabulary('./jw/templates','./jw/vocabulary.txt')

# print(fool.cut('ds发生在哪些身体部位'))
with open('H:/myself/PatholNLP/data/vocabulary/CHECK.txt','r',encoding='utf-8') as f:
    g = f.readlines()
    content = ''
    for i in g:
        content += (i.replace('\n','') + '，')
    print(content)
